This notebook…
This notebook requires…
# tidyverse packages
library(dplyr) # data wrangling
library(readr) # read/write tabular data
library(stringr) # work with strings
library(tidyr) # data wrangling
# spatial packages
library(janitor) # data wrangling
library(sf) # spatial data tools
library(mapview) # projections
library(tigris) # census data wrangling
# other packages
library(here) # file path management
library(naniar) # missing data
library(ggplot2)
library(viridis) # palettes
library(leaflet)
library(tidycensus)
library(sp)
library(ggplot2)
library(RColorBrewer) # color palettes
library(viridis) # color palettes
loading asthma data from MoPhim data base asthma is for 2015
asthma <- read_csv(here("data", "asthma.csv"))
Missing column names filled in: 'X3' [3]Parsed with column specification:
cols(
`Title:` = [31mcol_character()[39m,
`Missouri EPHT Asthma` = [31mcol_character()[39m,
X3 = [33mcol_logical()[39m
)
1031 parsing failures.
row col expected actual file
1 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma.csv'
2 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma.csv'
3 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma.csv'
4 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma.csv'
5 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma.csv'
... ... ......... ......... ............................................................................
See problems(...) for more details.
Getting rid of first three rows bc they are pointless text.
asthma %>%
slice(4:1029) -> asthma_sliced
Getting rid of third col with no data. aslo renaming cols to ‘count’ and ‘zip’
asthma_sliced %>%
select(-X3) %>%
rename(count = `Missouri EPHT Asthma`) %>%
rename(zip = `Title:`)-> asthma_sliced
Changing X to NA, making the count variable numeric
mutate(asthma_sliced, count = ifelse(count == "x", NA, count)) -> asthma_cleaned
mutate(asthma_sliced, count = as.numeric(count)) -> asthma_cleaned
NAs introduced by coercion
`
loading Mo zip codes
moZip <- zctas(state = "Missouri", year = 2010,
cb = FALSE, class ="sf")
Using FIPS code '29' for state 'Missouri'
ZCTAs can take several minutes to download. To cache the data and avoid re-downloading in future R sessions, set `options(tigris_use_cache = TRUE)`
Changing object to SF
st_as_sf(moZip) -> moZip
cleaning names, selecting just cols for zip and geometry, changing col name to zip
moZip %>%
clean_names() %>%
select(zcta5ce10, geometry) %>%
rename(zip = `zcta5ce10`)-> moZip_clean
loading data for asthma 2014
asthma_14<- read_csv(here("data", "asthma2014.csv"))
Missing column names filled in: 'X3' [3]Parsed with column specification:
cols(
`Title:` = [31mcol_character()[39m,
`Missouri EPHT Asthma` = [31mcol_character()[39m,
X3 = [33mcol_logical()[39m
)
1031 parsing failures.
row col expected actual file
1 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma2014.csv'
2 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma2014.csv'
3 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma2014.csv'
4 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma2014.csv'
5 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma2014.csv'
... ... ......... ......... ................................................................................
See problems(...) for more details.
getting rid of first three rows bc they have nothing in them
asthma_14 %>%
slice(4:1029) -> asthma14_sliced
getting rid of empty third col, renaming remaning cols
asthma14_sliced %>%
select(-X3) %>%
rename(count = `Missouri EPHT Asthma`) %>%
rename(zip = `Title:`) -> asthma14_sliced
changing X to NA, changing to numeric
mutate(asthma14_sliced, count = ifelse(count == "x", NA, count)) -> asthma14_cleaned
mutate(asthma14_sliced, count = as.numeric(count)) -> asthma14_cleaned
NAs introduced by coercion
left join to asthma_cleaned by zip. Now table has data for both 2014 and 2015
left_join(asthma_cleaned, asthma14_cleaned, by = "zip") -> joined_14_15
loading asthma data for 2013
asthma_2013 <- read_csv(here("data", "asthma2013.csv"))
Missing column names filled in: 'X3' [3]Parsed with column specification:
cols(
`Title:` = [31mcol_character()[39m,
`Missouri EPHT Asthma` = [31mcol_character()[39m,
X3 = [33mcol_logical()[39m
)
1031 parsing failures.
row col expected actual file
1 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma2013.csv'
2 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma2013.csv'
3 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma2013.csv'
4 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma2013.csv'
5 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma2013.csv'
... ... ......... ......... ................................................................................
See problems(...) for more details.
Getting rid of first 4 rows bc they have no data
asthma_2013 %>%
slice(4:1029) -> asthma13_sliced
getting rid of third col, renaming other cols
asthma13_sliced %>%
select(-X3) %>%
rename(count_13 = `Missouri EPHT Asthma`) %>%
rename(zip = `Title:`) -> asthma13_sliced
changing X to NA, changing count to numeric
mutate(asthma13_sliced, count_13 = ifelse(count_13 == "x", NA, count_13)) -> asthma13_cleaned
mutate(asthma13_sliced, count_13 = as.numeric(count_13)) -> asthma13_cleaned
NAs introduced by coercion
left joing to joined_14_15, so table now has data for years 2013, 2014, and 2015
left_join(joined_14_15, asthma13_cleaned, by = "zip") -> joined_13_14_15
loading data for 2012
asthma_2012 <- read_csv(here("data", "asthma2012.csv"))
getting rid of empty cols
asthma_2012 %>%
slice(4:1029) -> asthma12_sliced
cleaning data
asthma12_sliced %>%
select(-X3) %>%
rename(count_12 = `Missouri EPHT Asthma`) %>%
rename(zip = `Title:`) -> asthma12_sliced
Error in is_character(x) : object 'X3' not found
changing X to NA and changing count to numeric
mutate(asthma12_sliced, count_12 = ifelse(count_12 == "x", NA, count_12)) -> asthma12_cleaned
mutate(asthma12_sliced, count_12 = as.numeric(count_12)) -> asthma12_cleaned
NAs introduced by coercion
left joing to joined_13_14_15 by zip. Table now has data for 2012 through 2015
left_join(joined_13_14_15, asthma12_cleaned, by = "zip") -> joined12_15
loading data for 2011
asthma_2011 <- read_csv(here("data", "asthma2011.csv"))
Missing column names filled in: 'X3' [3]Parsed with column specification:
cols(
`Title:` = [31mcol_character()[39m,
`Missouri EPHT Asthma` = [31mcol_character()[39m,
X3 = [33mcol_logical()[39m
)
1031 parsing failures.
row col expected actual file
1 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma2011.csv'
2 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma2011.csv'
3 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma2011.csv'
4 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma2011.csv'
5 -- 3 columns 4 columns '/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/data/asthma2011.csv'
... ... ......... ......... ................................................................................
See problems(...) for more details.
getting rid of empty cols
asthma_2011 %>%
slice(4:1029) -> asthma11_sliced
cleaning data
asthma11_sliced %>%
select(-X3) %>%
rename(count_11 = `Missouri EPHT Asthma`) %>%
rename(zip = `Title:`) -> asthma11_sliced
changing X to NA, and changing count to be numeric
mutate(asthma11_sliced, count_11 = ifelse(count_11 == "x", NA, count_11)) -> asthma11_cleaned
mutate(asthma11_sliced, count_11 = as.numeric(count_11)) -> asthma11_cleaned
NAs introduced by coercion
joining to joined_12_15 for years 2011-2015
left_join(joined12_15, asthma11_cleaned, by = "zip") -> joined11_15
joining data with geometry by zip
asthma_full_11_15 <- left_join(moZip_clean, joined11_15, by = "zip")
asthma_full_11_15 %>%
rename(
count_14 = count.y,
count_15 = count.x
) -> asthma_full_11_15
filtering for Zip codes in St Louis Metro Area, first filter gets range, Exclude gets rid of zip codes in that range that aren’t included in the metro area.
asthma_full_11_15 %>%
filter(zip >= (63005))%>%
filter(zip <= (63390)) -> asthma_metro
exclude <- c(63091, 63155, 63333, 63334, 63336, 63339, 63344, 63345, 63350, 63351, 63352, 63353, 63359, 63359, 63361, 63363, 63382, 63388, 63384, 63036, 63087, 63330)
asthma_metro %>%
filter(as.character(zip) %in% exclude == FALSE) -> asthma_metro_2
changing NA’s to 0s
asthma_metro_2 %>%
mutate(count_11 = ifelse(is.na(count_11) == TRUE, 0, count_11)) %>%
mutate(count_12 = ifelse(is.na(count_12) == TRUE, 0, count_12)) %>%
mutate(count_13 = ifelse(is.na(count_13) == TRUE, 0, count_13)) %>%
mutate(count_14 = ifelse(is.na(count_14) == TRUE, 0, count_14)) %>%
mutate(count_15 = ifelse(is.na(count_15) == TRUE, 0, count_15))-> asthma_metro_2
Creating count for all 5 years
asthma_metro_2 %>%
group_by(zip) %>%
mutate(total = sum(count_15, count_14, count_13, count_12, count_11)) %>%
select(zip, total, geometry) -> asthma_count_total
rowwise() %>% getting census data to get population for zip code tracts. using 5 year estimates from 2015
acs <- load_variables(year = 2015, dataset = "acs5", cache = TRUE)
pop <- get_acs(geography = "zip code tabulation area", year = 2015, variables = "B01003_001", survey = "acs5")
Getting data from the 2011-2015 5-year ACS
filtering for zip codes in st. louis metro area
pop %>%
filter(GEOID >= (63005))%>%
filter(GEOID <= (63390)) ->pop
pop %>%
filter(as.character(GEOID) %in% exclude == FALSE)%>%
rename(zip = GEOID,
pop = estimate) %>%
select(zip, pop) -> pop_metro
left join pop_metro to asthma_count_total, making variable for count
left_join(asthma_count_total, pop_metro, by = "zip") %>%
mutate(count = (total/pop)*1000) %>%
select(zip, count, geometry) -> count_by_pop
mapview(count_by_pop)
creating ggplot map
ggplot() +
geom_sf(data = count_by_pop, mapping = aes(fill = count)) +
scale_fill_distiller(palette = "Greens", trans ="reverse")
write.csv(count_by_pop, here("csv", "asthmaCount11_15.csv"))
loading data for year 2015 (five year estimate)
aa <- get_acs(geography = "zip code tabulation area", year = 2015, variables = "B02009_001", survey = "acs5")
Getting data from the 2011-2015 5-year ACS
Joining with pop data
aa <- left_join(aa, pop, by = "GEOID")
normalizing by pop
aa %>%
mutate(percent = estimate.x/estimate.y) %>%
rename(zip = GEOID) -> aa
join with asthma data
aa_metro <- left_join(count_by_pop, aa, by = "zip")
saving csv file for % african american
write.csv(aa_metro, here("csv", "percent_aa.csv"))
saving as shapefile
dir.create(here("csv", "precent_aa"))
st_write(aa_metro, dsn = here("csv","precent_aa", "precent_aa.shp"), delete_dsn = TRUE)
GDAL Error 1: /Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/csv/precent_aa/precent_aa.shp does not appear to be a file or directory.
Deleting source `/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/csv/precent_aa/precent_aa.shp' failed
Writing layer `precent_aa' to data source `/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/csv/precent_aa/precent_aa.shp' using driver `ESRI Shapefile'
features: 127
fields: 11
geometry type: Multi Polygon
loading five year estimate for 2015
median_income <- get_acs(geography = "zip code tabulation area", year = 2015, variables = "B06011_001", survey = "acs5")
join with pop data
median_income <- left_join(median_income, pop, by = "GEOID")
normalize by pop
median_income%>%
mutate(percent = estimate.x/estimate.y) %>%
rename(zip = GEOID) -> median_income
join with asthma data
median_income <- left_join(count_by_pop, median_income, by = "zip")
saving csv file for median income
write.csv(median_income, here("csv", "median_income.csv"))
saving as shapefile
dir.create(here("csv", "median_income"))
st_write(median_income, dsn = here("csv","median_income", "median_income.shp"), delete_dsn = TRUE)
GDAL Error 1: /Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/csv/median_income/median_income.shp does not appear to be a file or directory.
Deleting source `/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/csv/median_income/median_income.shp' failed
Writing layer `median_income' to data source `/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/csv/median_income/median_income.shp' using driver `ESRI Shapefile'
features: 127
fields: 11
geometry type: Multi Polygon
loading 5 year estimate from 2015
home_value <- get_acs(geography = "zip code tabulation area", year = 2015, variables = "B25077_001", survey = "acs5")
Getting data from the 2011-2015 5-year ACS
joining with pop data
home_value <- left_join(home_value, pop, by = "GEOID")
normalizing by pop
home_value%>%
mutate(percent = estimate.x/estimate.y) %>%
rename(zip = GEOID) -> home_value
joining with asthma data
home_metro <- left_join(count_by_pop, home_value, by = "zip")
Saving csv file for median home value
write.csv(home_metro, here("csv", "median_home.csv"))
saving as shapefile
dir.create(here("csv", "home_value"))
st_write(home_metro, dsn = here("csv","home_value", "home_value.shp"), delete_dsn = TRUE)
GDAL Error 1: /Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/csv/home_value/home_value.shp does not appear to be a file or directory.
Deleting source `/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/csv/home_value/home_value.shp' failed
Writing layer `home_value' to data source `/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/csv/home_value/home_value.shp' using driver `ESRI Shapefile'
features: 127
fields: 11
geometry type: Multi Polygon
loading five year estimate from 2015
medicaid <- get_acs(geography = "zip code tabulation area", year = 2015, variables = "B992707_001", survey = "acs5")
joining with pop
medicaid <- left_join(medicaid, pop, by = "GEOID")
normalizing by pop
medicaid %>%
mutate(percent = estimate.x/estimate.y) %>%
rename(zip = GEOID) -> medicaid
joining with asthma data
med_metro <- left_join(count_by_pop, medicaid, by = "zip")
saving csv file
write.csv(med_metro, here("csv", "medicaid.csv"))
saving as shapefile
dir.create(here("csv", "medicaid"))
st_write(med_metro, dsn = here("csv","medicaid", "medicaid.shp"), delete_dsn = TRUE)
GDAL Error 1: /Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/csv/medicaid/medicaid.shp does not appear to be a file or directory.
Deleting source `/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/csv/medicaid/medicaid.shp' failed
Writing layer `medicaid' to data source `/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/csv/medicaid/medicaid.shp' using driver `ESRI Shapefile'
features: 127
fields: 10
geometry type: Multi Polygon
loading five year estimates for 2015
poverty <- get_acs(geography = "zip code tabulation area", year = 2015, variables = "B17001_002", survey = "acs5")
loading poverty total
poverty_total <- get_acs(geography = "zip code tabulation area", year = 2015, variables = "B17001_001", survey = "acs5")
poverty %>%
select(GEOID, estimate) -> poverty
poverty <- left_join(poverty, poverty_total, by = "GEOID")
normalizing by povery total
poverty %>%
mutate(density = estimate.x/estimate.y) %>%
rename(zip = GEOID) -> poverty
joining with asthma data
poverty_metro <- left_join(count_by_pop, poverty, by = "zip")
saving as csv file
write.csv(poverty_metro, here("csv", "poverty.csv"))
dir.create(here("csv", "poverty"))
st_write(poverty_metro, dsn = here("csv","poverty", "poverty.shp"), delete_dsn = TRUE)
GDAL Error 1: /Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/csv/poverty/poverty.shp does not appear to be a file or directory.
Deleting source `/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/csv/poverty/poverty.shp' failed
Writing layer `poverty' to data source `/Users/avagagner/Desktop/Courses/Fall 2019/Research/Asthma/csv/poverty/poverty.shp' using driver `ESRI Shapefile'
features: 127
fields: 8
geometry type: Multi Polygon